package spark.pipeline

import org.apache.spark.ml.clustering.{KMeans, KMeansModel}
import org.apache.spark.ml.evaluation.ClusteringEvaluator
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.sql.SparkSession
import spark.pipeline.IrisLogisticRegression.iris

object IrisKMeans {
    case class iris(features : Vector, label : String)
    def main(args:Array[String]): Unit ={
        val spark = SparkSession.builder().appName("iris").master("local").getOrCreate()
        val sc = spark.sparkContext

        val file = "C:/Users/Lenovo/Desktop/Working/Python/data/iris.txt"
        val rawData = sc.textFile(file)
        import spark.implicits._
        val irisData = rawData
          .map(_.split(","))
          .map(p=>iris(Vectors.dense(p(0).toDouble,p(1).toDouble,p(2).toDouble,p(3).toDouble),p(4).toString))
          .toDF()
          .cache()
        val KMeansModel = new KMeans()
          .setK(6)
          .setFeaturesCol("features")
          .setPredictionCol("prediction")
          .fit(irisData)
        val result = KMeansModel.transform(irisData)
        result.collect().foreach(
            row =>{
                println(row(1) + " => cluster " + row(2))
            }
        )
        val a  = KMeansModel.clusterCenters
        a.foreach(println)

        val evaluator = new ClusteringEvaluator()
        val b = evaluator.evaluate(result)
        println(b)

    }
}
